While investigating some discrepancies among different datasets a handful of corrections were made. This document is intended to see if any of those changes have important implications for some of the species-specific reports that have been done recently.
The two datasets being compared here are the survdat_Nye_allseasons.RData and the more recent NEFSC_BTS_all_seasons_03032021.RData.
# Cleanup functions
source(here("R/01_nefsc_ss_build.R"))
#### Survdat resource Load Data
# Data used in 2020 paper
load(paste0(nmfs_path, "Survdat_Nye_allseason.RData"))
survdat_nye <- survdat %>% clean_names()
# Run cleanup
survdat_nye <- survdat_prep(survdat = survdat_nye) %>%
mutate(survdat_source = "survdat_nye")
# Data we just received in 2021 with errors located and corrected
load(paste0(nmfs_path, "NEFSC_BTS_all_seasons_03032021.RData"))
survdat_21 <- survey$survdat %>% clean_names()
# Run cleanup
survdat_21 <- survdat_prep(survdat = survdat_21) %>%
mutate(survdat_source = "survdat_2021")
rm(survdat, survey)
#### Load the species list
species_check <- read_csv(here("data/andrew_species/Assesmentfishspecies.csv"))
species_check <- species_check %>%
clean_names() %>%
mutate(svspp = str_pad(svspp, 3, pad = "0", side = "left"),
comname = tolower(comname),
species = str_to_title(species)) %>%
arrange(svspp)
# Make years comparable
survdat_21 <- survdat_21 %>%
filter(est_year %in% unique(survdat_nye$est_year))
# Filter species down for both
survdat_nye <- filter(survdat_nye, svspp %in% species_check$svspp)
survdat_21 <- filter(survdat_21, svspp %in% species_check$svspp)
# Do some gross abundance or biomass comparisons by species
species_nye <- survdat_nye %>% split(.$comname)
species_21 <- survdat_21 %>% split(.$comname)
# Make Comparisons
# Vector of species and their common names, atleast the common names Andrew used
andrew_species <- species_check$comname %>% setNames(species_check$svspp)
# Pull the species
species_comparisons <- imap(andrew_species, function(species_comname, species_svspp){
# there are some name mismatches, so catch those
in_nye <- species_comname %in% names(species_nye)
in_21 <- species_comname %in% names(species_21)
in_both <- in_nye & in_21
if(in_both == FALSE){
return(list("in_nye" = in_nye,
"in_21" = in_21,
"data" = data.frame(),
"metrics" = data.frame()))
}
# Pull just that species from the old data
old_data_summ <- survdat_nye %>%
filter(svspp == species_svspp) %>%
group_by(comname, est_year) %>%
summarise(old_abundance = sum(abundance, na.rm = T),
old_biomass = sum(biom_adj, na.rm = T),
.groups = "keep")
# Pull just that species from the new data
new_data_summ <- survdat_21 %>%
filter(svspp == species_svspp) %>%
group_by(comname, est_year) %>%
summarise(new_abundance = sum(abundance, na.rm = T),
new_biomass = sum(biom_adj, na.rm = T),
.groups = "keep")
# join them for side-by-side data
combined_data <- left_join(old_data_summ, new_data_summ, by = c("comname", "est_year")) %>%
mutate(relative_abund_change = ((new_abundance - old_abundance) / old_abundance) * 100,
relative_biom_change = ((new_biomass - old_biomass) / old_biomass) * 100)
# correlation
abund_cor <- cor(combined_data$new_abundance, combined_data$old_abundance, use = "pairwise.complete.obs")
biomass_cor <- cor(combined_data$new_biomass, combined_data$old_biomass, use = "pairwise.complete.obs")
# average relative shift
abund_shift <- mean(combined_data$relative_abund_change, na.rm = T)
biom_shift <- mean(combined_data$relative_biom_change, na.rm = T)
# put in list to export
list("data" = combined_data,
"metrics" = data.frame("comname" = species_comname,
"svspp" = species_svspp,
"abundance_correlation" = abund_cor,
"biomass_correlation" = biomass_cor,
"abundance_percent_change" = abund_shift,
"biomass_percent_change" = biom_shift))
})
# put data and metrics into a table
comparison_data <- map_dfr(species_comparisons, ~.x[["data"]])
comparison_metrics <- map_dfr(species_comparisons, ~.x[["metrics"]])
comparison_metrics %>%
mutate(comname = fct_reorder(comname, abundance_correlation, max, .desc = F)) %>%
ggplot(aes(x = abundance_correlation, y = comname)) +
geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
geom_point(color = gmri_cols("gmri blue")) +
labs(y = "", x = "Correlation Between Total Annual Abundances")
comparison_metrics %>%
mutate(comname = fct_reorder(comname, abundance_percent_change, max, .desc = F)) %>%
ggplot(aes(x = abundance_percent_change, y = comname)) +
geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
geom_point(color = gmri_cols("gmri blue")) +
scale_x_continuous(labels = scales::percent_format()) +
labs(y = "", x = "Avg. Percent Change in Annual Abundance\nMoving from Old Data -> New Data")
# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>%
filter(abundance_correlation <= corr_cutoff) %>%
mutate(comname = fct_drop(comname)) %>%
pull(comname)
# filter species
corr_sub <- comparison_data %>%
filter(comname %in% corr_species)
# how many species per plot
p1 <- corr_species[1:6]
p2 <- corr_species[-c(1:6)]
# p1
corr_sub %>%
filter(comname %in% p1) %>%
mutate(comname = fct_drop(comname)) %>%
ggplot() +
geom_line(aes(est_year, old_abundance, color = "Survdat Nye")) +
geom_line(aes(est_year, new_abundance, color = "Newest Survdat")) +
facet_wrap(~comname, ncol = 1, scales = "free" ) +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_gmri() +
labs(x = "",
y = "Annual Abundance",
title = paste0("Species with Annual Abundance Correlation <= ", corr_cutoff),
color = "Survdat Source",
subtitle = "Subset 1")
# p1
corr_sub %>%
filter(comname %in% p2) %>%
mutate(comname = fct_drop(comname)) %>%
ggplot() +
geom_line(aes(est_year, old_abundance, color = "Survdat Nye")) +
geom_line(aes(est_year, new_abundance, color = "Newest Survdat")) +
facet_wrap(~comname, ncol = 1, scales = "free" ) +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_gmri() +
labs(x = "",
y = "Annual Abundance",
title = paste0("Species with Annual Abundance Correlation <= ", corr_cutoff),
color = "Survdat Source",
subtitle = "Subset 2")
# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>%
filter(abundance_percent_change >= perc_cutoff) %>%
mutate(comname = fct_drop(comname)) %>%
pull(comname)
comparison_data %>%
filter(comname %in% perc_species) %>%
mutate(comname = fct_drop(comname)) %>%
ggplot() +
geom_line(aes(est_year, old_abundance, color = "Survdat Nye")) +
geom_line(aes(est_year, new_abundance, color = "Newest Survdat")) +
facet_wrap(~comname, ncol = 1, scales = "free") +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_gmri() +
labs(x = "",
y = "Annual Abundance",
title = paste0("Species with Avg. Shift in Abundance >= ", perc_cutoff, "%"),
color = "Survdat Source")
comparison_metrics %>%
mutate(comname = fct_reorder(comname, biomass_correlation, max, .desc = F)) %>%
ggplot(aes(x = biomass_correlation, y = comname)) +
geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
geom_point(color = gmri_cols("gmri blue")) +
labs(y = "", x = "Correlation Between Total Annual Biomasses")
comparison_metrics %>%
mutate(comname = fct_reorder(comname, biomass_percent_change, max, .desc = F)) %>%
ggplot(aes(x = biomass_percent_change, y = comname)) +
geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
geom_point(color = gmri_cols("gmri blue")) +
labs(y = "", x = "Avg. Percent Change in Annual Biomasses\nMoving from Old Data -> New Data")
# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>%
filter(biomass_correlation <= corr_cutoff) %>%
mutate(comname = fct_drop(comname)) %>%
pull(comname)
# plot
comparison_data %>%
filter(comname %in% corr_species) %>%
mutate(comname = fct_drop(comname)) %>%
ggplot() +
geom_line(aes(est_year, old_biomass, color = "Survdat Nye")) +
geom_line(aes(est_year, new_biomass, color = "Newest Survdat")) +
facet_wrap(~comname, ncol = 1, scales = "free" ) +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_gmri() +
labs(x = "",
y = "Annual Biomass",
title = paste0("Species with Annual Biomass Correlation <= ", corr_cutoff),
color = "Survdat Source")
# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>%
filter(biomass_percent_change >= perc_cutoff) %>%
mutate(comname = fct_drop(comname)) %>%
pull(comname)
comparison_data %>%
filter(comname %in% perc_species) %>%
mutate(comname = fct_drop(comname)) %>%
ggplot() +
geom_line(aes(est_year, old_biomass, color = "Survdat Nye")) +
geom_line(aes(est_year, new_biomass, color = "Newest Survdat")) +
facet_wrap(~comname, ncol = 1, scales = "free") +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_gmri() +
labs(x = "",
y = "Annual Biomass",
title = paste0("Species with Avg. Shift in Biomass >= ", perc_cutoff, "%"),
color = "Survdat Source")
A work by Adam A. Kemberling
Akemberling@gmri.org